library(dplyr)
library(forcats)
library(ggplot2)
library(stringr)
library(tidyr)
library(wesanderson)RMSEs from Time Series CV on Training Data
AGU manuscript
Set Up
Load packages:
Specify file path:
fp = "~/../../Volumes/subseasonal_extreme/agu-manuscript-code/EESNs/"RMSEs
Data Steps
Load and clean RMSEs:
cv_rmses <-
readr::read_csv(
file = paste0(fp, "01-tuning-time-series-cv/results/cv-rmses.csv"),
show_col_types = FALSE
) |>
mutate(file = str_remove(file, ".csv")) |>
separate(
file,
c(
"target_region",
"tau",
"m",
"nh",
"nu"
),
remove = FALSE
) |>
mutate(
target_region = str_remove(target_region, "targetregion"),
tau = as.numeric(str_remove(tau, "tau")),
m = as.numeric(str_remove(m, "m")),
nh = as.numeric(str_remove(nh, "nh")),
nu = as.numeric(str_remove(nu, "nu")) / 100
)Overall Best
Determine which parameters resulted in the lowest RMSEs:
best_rmses <-
cv_rmses |>
filter(
rmse == min(rmse, na.rm = TRUE),
.by = c(target_region, tau)
)Print the best RMSEs:
best_rmses |>
mutate(
rmse = round(rmse, 2),
rmse_extreme = round(rmse_extreme, 2)
) |>
select(-file) |>
arrange(target_region, tau) |>
knitr::kable()| target_region | tau | m | nh | nu | rmse | rmse_extreme |
|---|---|---|---|---|---|---|
| MW | 1 | 0 | 50 | 0.35 | 2.51 | 3.49 |
| MW | 2 | 1 | 50 | 0.10 | 2.93 | 4.38 |
| MW | 3 | 2 | 50 | 0.85 | 2.98 | 4.48 |
| MW | 4 | 4 | 50 | 0.85 | 3.00 | 4.55 |
| NE | 1 | 0 | 50 | 0.35 | 2.36 | 3.36 |
| NE | 2 | 1 | 50 | 0.10 | 2.68 | 4.07 |
| NE | 3 | 2 | 50 | 0.85 | 2.71 | 4.17 |
| NE | 4 | 4 | 50 | 0.85 | 2.74 | 4.21 |
| SE | 1 | 0 | 50 | 0.10 | 2.09 | 3.07 |
| SE | 2 | 1 | 50 | 0.85 | 2.36 | 3.63 |
| SE | 3 | 2 | 50 | 0.85 | 2.38 | 3.69 |
| SE | 4 | 4 | 50 | 0.85 | 2.40 | 3.73 |
| SW | 1 | 0 | 50 | 0.10 | 1.94 | 2.89 |
| SW | 2 | 0 | 50 | 0.85 | 2.06 | 3.16 |
| SW | 3 | 3 | 50 | 0.85 | 2.06 | 3.20 |
| SW | 4 | 4 | 50 | 0.85 | 2.06 | 3.22 |
| W | 1 | 0 | 50 | 0.10 | 2.32 | 3.37 |
| W | 2 | 4 | 50 | 0.60 | 2.44 | 3.70 |
| W | 3 | 4 | 50 | 0.85 | 2.44 | 3.71 |
| W | 4 | 4 | 50 | 0.85 | 2.44 | 3.72 |
Save best rmses:
write.csv(
best_rmses,
paste0(fp, "01-tuning-time-series-cv/results/cv-rmses-best.csv"),
row.names = FALSE
)Plot the best RMSEs:
Plot the corresponding RMSEs on extremes:
Best By Tuning Parameter
Determine lowest RMSEs for each value of nu, input vars, forecast horizon, and target region:
best_rmses_by_param <-
cv_rmses |>
select(-rmse_extreme) |>
filter(
rmse == min(rmse, na.rm = TRUE),
.by = c(target_region, tau, nu)
) |>
mutate(best_rmse = TRUE)
best_rmses_extremes_by_param <-
cv_rmses |>
select(-rmse) |>
filter(
rmse_extreme == min(rmse_extreme, na.rm = TRUE),
.by = c(target_region, tau, nu)
) |>
mutate(best_rmse = TRUE)Plot the best RMSEs for each set of model iterations:
Plot the best RMSEs for each set of model iterations (considering extremes only):
Relationship to Tuning Params
Plot of RMSEs (training data):
Plot of RMSEs for extreme observations only (testing data):